import pandas as pd
import numpy as np
from scipy.special import expit  # Sigmoid 函数

# —————————————————————————
# 1. 读取全体报道基线数据
# —————————————————————————
full_file = "Monthly_Indices_Full.xlsx"
both = pd.read_excel(full_file, engine="openpyxl")

# 解析 Month 列 (YYYY-MM) 为 PeriodIndex 并设为索引
both['Month'] = pd.to_datetime(both['Month'], format='%Y-%m').dt.to_period('M')
both.set_index('Month', inplace=True)

# —————————————————————————
# 2. 计算“Balance/Impact/Tone”三指标的 σ-NRMSE
# —————————————————————————
metrics = ['BalanceIndex','ImpactIndex','ToneIndex']
baseline_metrics = both[metrics]
stds = baseline_metrics.std()  # 基线标准差

# 读取媒体数据
media_file = "Monthly_Indices_50_Media.xlsx"
xls = pd.ExcelFile(media_file, engine="openpyxl")

records = []
for media in xls.sheet_names:
    dfm = pd.read_excel(xls, sheet_name=media, engine="openpyxl")
    dfm['Month'] = pd.to_datetime(dfm['Month'], format='%Y-%m').dt.to_period('M')
    dfm.set_index('Month', inplace=True)

    df_join = baseline_metrics.join(dfm[metrics], how='inner', rsuffix='_med')
    rec = {'Media': media}

    # 对三项指标逐个计算 σ-NRMSE
    for m in metrics:
        diff = df_join[f'{m}_med'] - df_join[m]
        rmse = np.sqrt((diff**2).mean())
        rec[f'NRMSE_{m}'] = rmse / stds[m] if stds[m] != 0 else np.nan

    records.append(rec)

nrmse_df = pd.DataFrame(records).set_index('Media')

# —————————————————————————
# 3. 对“TotalEvents”做 σ-NRMSE
# —————————————————————————
baseline_events = both['TotalEvents']
sigma_events = baseline_events.std()

ev_records = []
for media in xls.sheet_names:
    dfm = pd.read_excel(xls, sheet_name=media, engine="openpyxl")
    dfm['Month'] = pd.to_datetime(dfm['Month'], format='%Y-%m').dt.to_period('M')
    dfm.set_index('Month', inplace=True)

    aligned = pd.concat([baseline_events, dfm['TotalEvents']], axis=1, join='inner')
    aligned.columns = ['base','med']
    rmse_ev = np.sqrt(((aligned['med'] - aligned['base'])**2).mean())
    ev_records.append({
        'Media': media,
        'NRMSE_EventCount': rmse_ev / sigma_events if sigma_events!=0 else np.nan
    })

ev_nrmse_df = pd.DataFrame(ev_records).set_index('Media')

# —————————————————————————
# 4. 合并所有 NRMSE 指标
# —————————————————————————
all_df = nrmse_df.join(ev_nrmse_df, how='inner')

# —————————————————————————
# 5. Z-Score 标准化＋Sigmoid 映射
# —————————————————————————
mapped = all_df.copy()
for col in all_df.columns:
    mu = all_df[col].mean()
    sd = all_df[col].std()
    z = (all_df[col] - mu) / sd
    mapped[col.replace('NRMSE_', 'SigmoidDeviation_')] = expit(z)

# —————————————————————————
# 6. 输出结果
# —————————————————————————
# 最终保留 Sigmoid 映射后的四列
output_cols = [
    'SigmoidDeviation_BalanceIndex',
    'SigmoidDeviation_ImpactIndex',
    'SigmoidDeviation_ToneIndex',
    'SigmoidDeviation_EventCount'
]
mapped[output_cols].to_excel("Media_4Metrics_SigmoidDeviation.xlsx")

print("✅ 已生成：Media_4Metrics_SigmoidDeviation.xlsx")
